Analysis date: 2023-08-09

Depends on

CRC_Xenografts_Batch2_DataProcessing Script

load("../Data/Cache/Xenografts_Batch2_DataProcessing.RData")
set.seed(2023)

Setup

Load libraries

Functions

General

source("../../../General/Code/Analysis_Functions.R")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse()     masks IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()        masks matrixStats::count()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks S4Vectors::first()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::select()       masks AnnotationDbi::select()
## ✖ dplyr::slice()        masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
source("CRC_Xenografts_Batch2_Functions.R")

StringDB

Plot_StringDB <- function(hits){
  hits <- as.data.frame(hits)
  colnames(hits) <- "HGNC_Symbol"
  hits_mapped <- string_db$map( hits, "HGNC_Symbol", removeUnmappedRows = TRUE )
  string_db$plot_network( hits_mapped)
}

k-means Peptide - pY

Format data

mat_kmean_pY <- 
  pY_Set3_form %>% 
  mutate(peptide = paste0(HGNC_Symbol, "_", Annotated_Sequence) ) %>% 
  column_to_rownames("peptide") %>% 
  select(all_of(contains("log2FC"))) %>%
  as.matrix()

kmeans_tb_pY <- 
  pY_Set3_form %>% 
  select(HGNC_Symbol, Annotated_Sequence, all_of(contains("log2FC"))) %>%
  pivot_longer(contains("log2FC"), names_to = "sample", values_to = "log2FC") %>%
  separate(col = sample, sep = "_", 
           into = c("remove", "xenograft", "treatment", 
                    "timepoint", "replicate", "set" ), remove = F ) %>%
  mutate(peptide = paste0(HGNC_Symbol, "_", Annotated_Sequence) )

Find optimal number of clusters

KMeans_Find_Nr_Clusters_elbow(mat_kmean_pY, c_max = 30)

Perform k-means

pST_kmeans = kmeans(mat_kmean_pY, centers = 6, nstart = 200, iter.max = 10)
cluster_df_pY <-  tibble("cluster" = pST_kmeans$cluster)
cluster_df_pY$peptide <- names(pST_kmeans$cluster)
kmeans_tb_pY <- left_join(kmeans_tb_pY, cluster_df_pY)
## Joining with `by = join_by(peptide)`

Plot profiles

kmeans_tb_pY %>%
  ggplot(aes(sample, log2FC, group = Annotated_Sequence)) +
  geom_line(alpha = 0.2) +
  geom_point(aes(color = treatment), size = 0.2) +
  facet_wrap(~cluster) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_color_manual(values = PGPalette[c(1,2,4,5)])

StringDB clusters

message("Cluster 3")
## Cluster 3
Plot_StringDB(kmeans_tb_pY %>% 
                select(HGNC_Symbol, Annotated_Sequence, cluster) %>% 
                filter(cluster == 3) %>% 
                select(HGNC_Symbol) %>% unique())

k-means Peptide - pST

Format data

mat_kmean_pST <- 
  pST_Set3_form %>% 
  mutate(peptide = paste0(HGNC_Symbol, "_", Annotated_Sequence) ) %>% 
  column_to_rownames("peptide") %>% 
  select(all_of(contains("log2FC"))) %>%
  as.matrix()

kmeans_tb_pST <- 
  pST_Set3_form %>% 
  select(HGNC_Symbol, Annotated_Sequence, all_of(contains("log2FC"))) %>%
  pivot_longer(contains("log2FC"), names_to = "sample", values_to = "log2FC") %>%
  separate(col = sample, sep = "_", 
           into = c("remove", "xenograft", "treatment", 
                    "timepoint", "replicate", "set" ), remove = F ) %>%
  mutate(peptide = paste0(HGNC_Symbol, "_", Annotated_Sequence) )

Find optimal number of clusters

KMeans_Find_Nr_Clusters_elbow(mat_kmean_pST, c_max = 30)

Perform k-means

pST_kmeans = kmeans(mat_kmean_pST, centers = 3, nstart = 200, iter.max = 10)
cluster_df_pST <-  tibble("cluster" = pST_kmeans$cluster)
cluster_df_pST$peptide <- names(pST_kmeans$cluster)
kmeans_tb_pST <- 
  left_join(kmeans_tb_pST, cluster_df_pST)
## Joining with `by = join_by(peptide)`

Plot profiles

kmeans_tb_pST %>%
  ggplot(aes(sample, log2FC, group = Annotated_Sequence)) +
  geom_line(alpha = 0.2) +
  geom_point(aes(color = treatment), size = 0.2) +
  facet_wrap(~cluster, ncol = 3) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_color_manual(values = PGPalette[c(1,2,4,5)])

Session Info

sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.9.2             forcats_1.0.0              
##  [3] stringr_1.5.0               dplyr_1.1.2                
##  [5] purrr_1.0.1                 readr_2.1.4                
##  [7] tidyr_1.3.0                 tibble_3.2.1               
##  [9] tidyverse_2.0.0             cluster_2.1.4              
## [11] factoextra_1.0.7            ggplot2_3.4.2              
## [13] mdatools_0.14.0             SummarizedExperiment_1.28.0
## [15] GenomicRanges_1.50.2        GenomeInfoDb_1.34.9        
## [17] MatrixGenerics_1.10.0       matrixStats_1.0.0          
## [19] org.Hs.eg.db_3.16.0         AnnotationDbi_1.60.2       
## [21] IRanges_2.32.0              S4Vectors_0.36.2           
## [23] Biobase_2.58.0              BiocGenerics_0.44.0        
## [25] fgsea_1.24.0               
## 
## loaded via a namespace (and not attached):
##  [1] STRINGdb_2.10.1        bitops_1.0-7           bit64_4.0.5           
##  [4] RColorBrewer_1.1-3     httr_1.4.6             backports_1.4.1       
##  [7] tools_4.2.3            bslib_0.5.0            DT_0.28               
## [10] utf8_1.2.3             R6_2.5.1               KernSmooth_2.23-22    
## [13] DBI_1.1.3              colorspace_2.1-0       withr_2.5.0           
## [16] tidyselect_1.2.0       bit_4.0.5              compiler_4.2.3        
## [19] chron_2.3-61           cli_3.6.1              DelayedArray_0.24.0   
## [22] labeling_0.4.2         sass_0.4.7             caTools_1.18.2        
## [25] scales_1.2.1           digest_0.6.33          rmarkdown_2.23        
## [28] XVector_0.38.0         pkgconfig_2.0.3        htmltools_0.5.5       
## [31] plotrix_3.8-2          highr_0.10             fastmap_1.1.1         
## [34] htmlwidgets_1.6.2      rlang_1.1.1            rstudioapi_0.15.0     
## [37] RSQLite_2.3.1          farver_2.1.1           jquerylib_0.1.4       
## [40] generics_0.1.3         jsonlite_1.8.7         crosstalk_1.2.0       
## [43] BiocParallel_1.32.6    gtools_3.9.4           RCurl_1.98-1.12       
## [46] magrittr_2.0.3         GenomeInfoDbData_1.2.9 Matrix_1.6-0          
## [49] Rcpp_1.0.11            munsell_0.5.0          fansi_1.0.4           
## [52] proto_1.0.0            lifecycle_1.0.3        sqldf_0.4-11          
## [55] stringi_1.7.12         yaml_2.3.7             zlibbioc_1.44.0       
## [58] gplots_3.1.3           plyr_1.8.8             grid_4.2.3            
## [61] blob_1.2.4             parallel_4.2.3         ggrepel_0.9.3         
## [64] crayon_1.5.2           lattice_0.21-8         Biostrings_2.66.0     
## [67] cowplot_1.1.1          hash_2.2.6.2           hms_1.1.3             
## [70] KEGGREST_1.38.0        knitr_1.43             pillar_1.9.0          
## [73] igraph_1.5.0.1         codetools_0.2-19       fastmatch_1.1-3       
## [76] glue_1.6.2             evaluate_0.21          data.table_1.14.8     
## [79] tzdb_0.4.0             png_0.1-8              vctrs_0.6.3           
## [82] gtable_0.3.3           gsubfn_0.7             cachem_1.0.8          
## [85] xfun_0.39              broom_1.0.5            memoise_2.0.1         
## [88] timechange_0.2.0       ellipsis_0.3.2
knitr::knit_exit()